import pandas as pd
import seaborn as sns

df = pd.read_csv("AB_NYC_2019.csv")

df.shape

(48906, 16)

df.head()

df.tail()

df.sample(5)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48906 entries, 0 to 48905
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              48906 non-null  int64  
 1   name                            48890 non-null  object 
 2   host_id                         48906 non-null  int64  
 3   host_name                       48885 non-null  object 
 4   neighbourhood_group             48906 non-null  object 
 5   neighbourhood                   48906 non-null  object 
 6   latitude                        48906 non-null  float64
 7   longitude                       48906 non-null  float64
 8   room_type                       48906 non-null  object 
 9   price                           48906 non-null  int64  
 10  minimum_nights                  48906 non-null  int64  
 11  number_of_reviews               48906 non-null  int64  
 12  last_review                     38854 non-null  object 
 13  reviews_per_month               38854 non-null  float64
 14  calculated_host_listings_count  48906 non-null  int64  
 15  availability_365                48906 non-null  int64  
dtypes: float64(3), int64(7), object(6)
memory usage: 6.0+ MB

df.isna().sum()

id                                    0
name                                 16
host_id                               0
host_name                            21
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                       10052
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64

df.describe()

df.duplicated().sum()

11

df.nunique()

id                                48895
name                              47896
host_id                           37457
host_name                         11452
neighbourhood_group                   5
neighbourhood                       221
latitude                          19048
longitude                         14718
room_type                             3
price                               674
minimum_nights                      109
number_of_reviews                   394
last_review                        1764
reviews_per_month                   937
calculated_host_listings_count       47
availability_365                    366
dtype: int64

df["neighbourhood_group"].unique()

array(['Brooklyn', 'Manhattan', 'Queens', 'Staten Island', 'Bronx'],
      dtype=object)

sns.boxplot(df["price"])

<Axes: ylabel='price'>

sns.boxplot(df["availability_365"])

<Axes: ylabel='availability_365'>

	id	name	host_id	host_name	neighbourhood_group	neighbourhood	latitude	longitude	room_type	price	minimum_nights	number_of_reviews	last_review	reviews_per_month	calculated_host_listings_count	availability_365
0	2539	Clean & quiet apt home by the park	2787	John	Brooklyn	Kensington	40.64749	-73.97237	Private room	149	1	9	19-10-2018	0.21	6	365
1	2595	Skylit Midtown Castle	2845	Jennifer	Manhattan	Midtown	40.75362	-73.98377	Entire home/apt	225	1	45	21-05-2019	0.38	2	355
2	3647	THE VILLAGE OF HARLEM....NEW YORK !	4632	Elisabeth	Manhattan	Harlem	40.80902	-73.94190	Private room	150	3	0	NaN	NaN	1	365
3	3831	Cozy Entire Floor of Brownstone	4869	LisaRoxanne	Brooklyn	Clinton Hill	40.68514	-73.95976	Entire home/apt	89	1	270	05-07-2019	4.64	1	194
4	5022	Entire Apt: Spacious Studio/Loft by central park	7192	Laura	Manhattan	East Harlem	40.79851	-73.94399	Entire home/apt	80	10	9	19-11-2018	0.10	1	0

	id	name	host_id	host_name	neighbourhood_group	neighbourhood	latitude	longitude	room_type	price	minimum_nights	number_of_reviews	last_review	reviews_per_month	calculated_host_listings_count	availability_365
48901	5441	Central Manhattan/near Broadway	7989	Kate	Manhattan	Hell's Kitchen	40.76076	-73.98867	Private room	85	2	188	23-06-2019	1.50	1	39
48902	5803	Lovely Room 1, Garden, Best Area, Legal rental	9744	Laurie	Brooklyn	South Slope	40.66829	-73.98779	Private room	89	4	167	24-06-2019	1.34	3	314
48903	6021	Wonderful Guest Bedroom in Manhattan for SINGLES	11528	Claudio	Manhattan	Upper West Side	40.79826	-73.96113	Private room	85	2	113	05-07-2019	0.91	1	333
48904	6090	West Village Nest - Superhost	11975	Alina	Manhattan	West Village	40.73530	-74.00525	Entire home/apt	120	90	27	31-10-2018	0.22	1	0
48905	6848	Only 2 stops to Manhattan studio	15991	Allen & Irina	Brooklyn	Williamsburg	40.70837	-73.95352	Entire home/apt	140	2	148	29-06-2019	1.20	1	46

	id	name	host_id	host_name	neighbourhood_group	neighbourhood	latitude	longitude	room_type	price	minimum_nights	number_of_reviews	last_review	reviews_per_month	calculated_host_listings_count	availability_365
22052	17741347	Cozy studio w/kitchen & bathroom. Great location	3435092	Elena	Manhattan	Upper East Side	40.78230	-73.94838	Entire home/apt	105	5	117	23-06-2019	4.29	3	187
46714	35397690	Cozy 3 bedroom apt / Lower East Side	251918661	Alex	Manhattan	Chinatown	40.71678	-73.99492	Entire home/apt	400	4	3	13-06-2019	2.73	1	57
23701	19161510	Affordable and functional room	3788839	Lorenzo & Alex	Brooklyn	Bedford-Stuyvesant	40.68468	-73.92408	Private room	45	28	10	27-05-2019	0.46	4	310
7374	5445314	Beautiful Artist loft	21838149	Marcia	Brooklyn	DUMBO	40.70299	-73.98624	Private room	160	1	54	20-06-2019	1.03	1	10
26952	21354840	BRIGHT & CLEAN UNION SQ GEM APT! 700 SQF ALL Y...	18270371	Ena	Manhattan	Gramercy	40.73305	-73.98574	Entire home/apt	148	4	10	27-09-2018	0.52	1	0

	id	host_id	latitude	longitude	price	minimum_nights	number_of_reviews	reviews_per_month	calculated_host_listings_count	availability_365
count	4.890600e+04	4.890600e+04	48906.000000	48906.000000	48906.000000	48906.000000	48906.000000	38854.000000	48906.000000	48906.000000
mean	1.901287e+07	6.760480e+07	40.728952	-73.952175	152.711324	7.031612	23.300454	1.373151	7.142702	112.782031
std	1.098557e+07	7.860866e+07	0.054529	0.046154	240.128713	20.512489	44.607175	1.680270	32.948926	131.620370
min	2.539000e+03	2.438000e+03	40.499790	-74.244420	0.000000	1.000000	0.000000	0.010000	1.000000	0.000000
25%	9.464662e+06	7.809567e+06	40.690100	-73.983080	69.000000	1.000000	1.000000	0.190000	1.000000	0.000000
50%	1.967545e+07	3.078463e+07	40.723080	-73.955685	106.000000	3.000000	5.000000	0.720000	1.000000	45.000000
75%	2.915085e+07	1.074344e+08	40.763120	-73.936283	175.000000	5.000000	24.000000	2.020000	2.000000	227.000000
max	3.648724e+07	2.743213e+08	40.913060	-73.712990	10000.000000	1250.000000	629.000000	58.500000	327.000000	365.000000

1. Dimension of data?¶

2. How does the data look like?¶

3. What is the datatype of cols?¶

4. Are there any missing values?¶

5. How does the data look like mathematically?¶

6. Are there any duplicate values?¶

7. How many unique values are there in each column?¶

8. Are there any outliers in the numerical columns?¶